
# ------------------------------------------------------------------------------
#
# Code for MLSC Occupation-Cognition Study
# Prepared Spring, 2024
#
# Load workspace S1.RData by double-clicking on it or opening it from within
# R's native console or your preferred R IDE
#
# The following code reproduces the figures for the manuscript
#
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
# Load libraries, specify data types
# ------------------------------------------------------------------------------

# libs
library(lavaan)
library(RColorBrewer)
library(ggplot2)
library(gridExtra)
library(ggtext)
library(ggpubr)
library(ggrepel)
library(colorspace)

# renumber homemakers & unemployed
levels(d$SOC2)
levels(d$SOC2)[1] = "SOC.0h"
levels(d$SOC2)[2] = "SOC.0u"


# ------------------------------------------------------------------------------
# Re-estimate factor scores if needed... 
# ------------------------------------------------------------------------------

m.COG.i     = 'COG.i  =~ fsMVIS.i  + fsSPD.i  + fsRSN.i'
m.COG.ls    = 'COG.ls =~ fsMVIS.ls + fsSPD.ls + fsRSN.ls'
fit.COG.i  = cfa(m.COG.i,  data = d, missing = "ML")	
fit.COG.ls = cfa(m.COG.ls, data = d, missing = "ML")
d$COG.i  = as.numeric(lavPredict(fit.COG.i))
d$COG.ls = as.numeric(lavPredict(fit.COG.ls))


# ------------------------------------------------------------------------------
# Summaries by occupational class - needed for longitudinal trajectories
# ------------------------------------------------------------------------------

icogI  = c("fsSPD.i", "fsRSN.i", "fsMVIS.i", "COG.i")
icogLS = c("fsSPD.ls", "fsRSN.ls", "fsMVIS.ls", "COG.ls")
i_cog = c(icogI, icogLS)

# summaries
get_smrystats_bygrp <- function(d, outc, grp)
    {
	Ns  = by(d[,outc], d[,grp], function(x) apply(x,2,function(y) 
		     length(!is.na(y))))
    Ns = data.frame(do.call("rbind", Ns))
	names(Ns) = paste(names(Ns), ".n", sep = '')
    mus = by(d[,outc], d[,grp], colMeans, na.rm=T)
    mus = data.frame(do.call("rbind", mus))
	names(mus) = paste(names(mus), ".mu", sep = '')
    sds = by(d[,outc], d[,grp], function(x) apply(x,2,sd, na.rm=T))
    sds = data.frame(do.call("rbind", sds))
	names(sds) = paste(names(sds), ".sd", sep = '')
	i = order(c(1:ncol(mus), 1:ncol(sds), 1:ncol(Ns)))  
	smry = data.frame(cbind(mus,sds,Ns)[,i])
	names(smry) = gsub("FS_", "", names(smry))
	names(smry) = gsub("fs", "", names(smry))
    smry
	}


## Skill
SK_MuSD = get_smrystats_bygrp(d, i_cog, "SOC_Skill")			
SK_MuSD.cog = SK_MuSD[,grep("COG", names(SK_MuSD))]

# Distances by skill level in average COG.i, COG.s 
dif.i  = max(SK_MuSD.cog$COG.i.mu)  - min(SK_MuSD.cog$COG.i.mu)
dif.ls = max(SK_MuSD.cog$COG.ls.mu) - min(SK_MuSD.cog$COG.ls.mu)
dif.i; dif.ls; dif.i/dif.ls

## SOC 2
C2_MuSD = get_smrystats_bygrp(d, i_cog, "SOC2")			
C2_MuSD$SOC = as.factor(row.names(C2_MuSD))
C2_MuSD.cog = C2_MuSD[,grep("COG", names(C2_MuSD))]

# Distances between SOC2 groups' average COG.i, COG.s 
dif.i  = max(C2_MuSD.cog$COG.i.mu)  - min(C2_MuSD.cog$COG.i.mu)
dif.ls = max(C2_MuSD.cog$COG.ls.mu) - min(C2_MuSD.cog$COG.ls.mu)
dif.i; dif.ls; dif.i/dif.ls


# ------------------------------------------------------------------------------
# Fixed parameter estimates from analyses w/scaled scores
# ------------------------------------------------------------------------------

fixests = data.frame(var = c("RSN", "MVIS", "SPD"),
	                 i   = c(0.018150,  -0.288410, -0.161455),
	                 ls  = c(-0.143839, -0.545619, -0.464992),
	                 qs  = c(-0.111579, -0.085395, -0.069095)
                     )
fixests = rbind(fixests, c("COG", round(colMeans(fixests[,2:4]),6)))


# get grand mean (fixed) trajectories for a given range of ages
get_fixed_atages <- function(ages, mus) 
    {
	ctr_age = 70    # age was centered at 70y
	scl_age = .1    # rescale from decades to years
	lvl.mu = as.numeric(mus["i"][[1]])
	lsl.mu = as.numeric(mus["ls"][[1]])
	qsl.mu = as.numeric(mus["qs"][[1]])
	scrs = sapply(ages, function(AGE) 
	    {
		fix_mu = lvl.mu + 
			     lsl.mu*(scl_age*(AGE-ctr_age)) +
			     qsl.mu*((scl_age*(AGE-ctr_age))^2)
		})	
	names(scrs) = ages
	scrs
    }

# trajectories all groups
ages = seq(45,90,1)

# average curves (fixed effects)
cog_fixed_ages <- apply(fixests, 1, function(x)
    {
	get_fixed_atages(ages, x[2:4])
	})
cog_fixed_ages = data.frame(cog_fixed_ages)
names(cog_fixed_ages) = c("RSN", "MVIS", "SPD", "COG")


# ------------------------------------------------------------------------------
# Group-specific deviations (mean individual ranefs per group)
# ------------------------------------------------------------------------------

get_random_atages <- function(ages, mus)
    {
	ctr_age = 70    # age was centered at 70y
	scl_age = .1    # rescale from decades to years
	lvl.mu = mus["i"]
	lsl.mu = mus["ls"]	
	scrs = sapply(ages, function(AGE) 
	    {
		rnd_mu = lvl.mu + lsl.mu*(.1*(AGE-ctr_age))
		})	
	names(scrs) = ages
	scrs
	}
												  
randests = apply(C2_MuSD, 1, function(x)
    {
    list("RSN"  = list("mus" = c("i" = as.numeric(x["RSN.i.mu"] ),  
    	                         "ls" = as.numeric(x["RSN.ls.mu"]))),
	     "MVIS" = list("mus" = c("i" = as.numeric(x["MVIS.i.mu"]),  
	     	                     "ls" = as.numeric(x["MVIS.ls.mu"]))),
	     "SPD"  = list("mus" = c("i" = as.numeric(x["SPD.i.mu"] ),  
	     	                     "ls" = as.numeric(x["SPD.ls.mu"]))),
	     "COG"  = list("mus" = c("i" = as.numeric(x["COG.i.mu"] ),  
	     	                     "ls" = as.numeric(x["COG.ls.mu"])))
    	         )
	})

cog_random_ages <- lapply(randests, function(x)
    {
	lapply(x, function(y)
	    {
		get_random_atages(ages, y$mus)
		})
	})


# ------------------------------------------------------------------------------
# Combine the fixed trajectories and group deviation trajectories
# ------------------------------------------------------------------------------

cog_ests_ages <- lapply(names(cog_random_ages), function(o)
    {
	x = cog_random_ages[[o]]
	d = data.frame("RSN"  = cog_fixed_ages$RSN  + x$RSN,
	               "MVIS" = cog_fixed_ages$MVIS + x$MVIS,
	               "SPD"  = cog_fixed_ages$SPD  + x$SPD,
		           "COG"  = cog_fixed_ages$COG  + x$COG
                   )
	d$SOC = as.factor(o)
	d$Age = as.integer(row.names(d))
	d
	})
cog_ests_ages = do.call("rbind", cog_ests_ages)
cog_ests_ages$SOC = gsub("SOC\\.", "C", cog_ests_ages$SOC)


# ------------------------------------------------------------------------------
# What groups ended up worse than where they started?
# ------------------------------------------------------------------------------

t0 = cog_ests_ages[cog_ests_ages$Age == 45, c("SOC", "COG")]
t1 = cog_ests_ages[cog_ests_ages$Age == 90, c("SOC", "COG")]
t0 = t0[order(t0$COG, decreasing = T),]
t1 = t1[order(t1$COG, decreasing = T),]
t0$t0rank = c(1:nrow(t0))
t1$t1rank = c(1:nrow(t1))
trnks = merge(t0, t1, by = "SOC")[,-c(2,4)]
trnks[order(trnks$t0rank),]


# ------------------------------------------------------------------------------
# Labels for plotting
# ------------------------------------------------------------------------------

SOC = unique(cog_ests_ages$SOC); SOC  # already ordered

SOC_lbl = c("C00 Homemakers (+)", 
            "C00 Unemployed (+)", 
            "C11 Corporate Management (-)", 
            "C12 Agriculture/Service Mgmt.", 
            "C21 Science & Technology Prof.", 
            "C22 Healthcare Prof.", 
            "C23 Teaching & Research", 
            "C24 Business & Public Service", 
            "C31 Science & Technology Assoc. (-)", 
	        "C32 Health & Social Welfare (+)", 
            "C33 Protective Services (-)", 
            "C34 Culture, Media, & Sports (+)", 
            "C35 Business & Public Service", 
            "C41 Administrative (+)", 
            "C42 Secretarial (+)", 
            "C51 Agricultural Trades", 
            "C52 Metal & Electrical Trades (-)", 
            "C53 Construction & Building (-)", 
	        "C54 Textile & Printing (-)", 
            "C61 Personal Care", 
            "C62 Personal Leisure", 
            "C71 Sales", 
            "C72 Customer Service", 
            "C81 Process & Machine Operation (-)", 
            "C82 Industrial Transport/Driving", 
            "C91 Elementary Trades", 
            "C92 Elementary Admin/Service (+)" 
            )

SOCs = cbind(SOC, SOC_lbl)
cog_ests_ages = merge(cog_ests_ages, SOCs, by = "SOC", all.x = T)
cog_ests_ages = cog_ests_ages[order(cog_ests_ages$SOC, cog_ests_ages$Age),]
cog_ests_ages$SOC = as.factor(cog_ests_ages$SOC)
levels(cog_ests_ages$SOC)


# ------------------------------------------------------------------------------
# Set up colors and fonts
# labels/Numbers: Helvetica Neue 57 condensed roman
# ordinate/abscissa labels 10pt; xy numbers 9pt; keys 9pt
# ------------------------------------------------------------------------------

# https://colorkit.co/palettes/9-colors+rainbow/
cols = c("#666666", "#008080", "#004e7d",  "#295235", "#39a35f", 
	  	 "#61a6cf", "#5b4e3e", "#eaba5e",  "#e7724a", "#b03b4d")
	
idrk1 = c(5,6)
idrk2 = c(2,3,8,9)
ilgh1 = c(4,7)
cols[idrk1] = darken(cols[idrk1],  amount = .13)
cols[idrk2] = darken(cols[idrk2],  amount = .18)
cols[ilgh1] = lighten(cols[ilgh1], amount = .13)

theme_set(theme_classic())

ttl.axis.sz = 13
txt.axis.sz = 11
txt.sz      = 11


# ------------------------------------------------------------------------------
# Density (violin) plots for COG
# ------------------------------------------------------------------------------

ybr.i = seq(-2.5, 2.5, by = .5)
ybr.s = round(seq(-.3, .3, by = .1), 1)

get_dens <- function(dat, yname, yvar, grp, grplbl, cols, ybr)
    {
    p = ggplot(data = dat, aes(x=factor(grp), y=yvar, 
                               fill=grp, pattern_fill=grp)) +
               geom_violin(draw_quantiles = c(0.25, 0.75),
                           linetype = "dashed", alpha = .55) +
               geom_violin(fill="transparent",draw_quantiles = 0.5) +
               scale_fill_manual(values=cols) +	
               scale_y_continuous(breaks = ybr,
                                  name = yname) +
			   ylab(yname) + 
               xlab(grplbl) + 
               labs(fill="") +
               theme(axis.title = element_text(size = ttl.axis.sz), 
               	     axis.title.y = ggtext::element_markdown(lineheight=1.3),
              	     axis.text = element_text(size = txt.axis.sz),
               	     text = element_text(size = txt.sz)
              	     )
    }

# colors based on skill
igr1 = c(1,10,9,5,3) 

# colors based on skill levels, mapped to SOC2
igr2 = c(3,5,3,3,3,3,5,5,5,5,5,9,9,5,5,5,5,9,9,9,9,9,9,10,10,1,1)


plt1 = get_dens(d, "Intercept of General Cognition<br>
	                (*individual deviation scores*)", 
	                d[,"COG.i"], d[,"SOC_Skill"], 
	               "Occupational Skill Level", cols[igr1], ybr.i)

plt2 = get_dens(d, "Slope of General Cognition<br>
	                (*individual deviation scores*)", 
	                d[,"COG.ls"], d[,"SOC_Skill"], 
	               "Occupational Skill Level", cols[igr1], ybr.s)

# Figure 1
fig1 = ggarrange(plt1, plt2,
				 ncol=2, nrow=1,
				 label.x = .3, font.label = list(face="plain")
				 ,common.legend = TRUE, legend = FALSE
				 )
fig1


# ------------------------------------------------------------------------------
# Trajectories for COG
# ------------------------------------------------------------------------------

get_plot <- function(dat, alpha, cols, nx, ny, psiz, lblpd = .25, xcut)
    {
    ybr = seq(-3, 1.5, by = .25)
    xbr = seq(50, 90, by = 5)
    minX = 50
    maxX = 160
    minY = -1.75
    maxY = .75
    last = dat[which(dat$Age == 90),]
    ggplot(dat, aes(x=Age, y=COG, group=SOC)) + 
              geom_line(aes(col=SOC), lwd = 0.5, alpha = alpha)	+
              geom_text_repel(aes(label = SOC_lbl, col=SOC), 
                              data = last, 
              				  fontface = "plain", 
              	              alpha = 1.0,
              				  size = c(4), 
              	              segment.colour = "#DDDDDD",
              				  nudge_x = nx,
              	              nudge_y = ny,
              				  max.overlaps = 25,
              	              point.size = psiz,
                              direction = c("y")
              				  ) +
              scale_color_manual(values=cols) + 
              scale_fill_manual(values=cols) + 
              theme(legend.position = "none", 
                    axis.title = element_text(size = ttl.axis.sz), 
              	    axis.text = element_text(size = txt.axis.sz),
              	    text = element_text(size = txt.sz)
              	    ) +
              scale_x_continuous(breaks = xbr, 
                                 labels = xbr, 
                                 name = "Age in Years",
                                 limits = c(minX, maxX)) + 
              scale_y_continuous(breaks = ybr,
                                 # labels = ybreaks_labels, 
                                 name = "General Fluid Cognition", 
                                 limits = c(minY, maxY)) +
              coord_cartesian(xlim = c(55,xcut), ylim = c(minY, maxY))  
    }

# colors based on skill levels, mapped to SOC2
igr3 = c(1,1,3,5,3,3,3,3,5,5,5,5,5,9,9,5,5,5,5,9,9,9,9,9,9,10,10)

# Figure 2
fig2 = get_plot(cog_ests_ages, alpha = .7, cols[igr3], nx=10, ny=0.2, psiz=.5, 
	     lblpd = .5, xcut=107)
fig2
